{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a833abf1-7bee-4fde-a31f-6c8e5273cb5e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "import statsmodels.formula.api as smf\n",
    "import statsmodels.api as sm\n",
    "from statsmodels.miscmodels.ordinal_model import OrderedModel\n",
    "from linearmodels.panel import PanelOLS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "565ff7a9-c08a-4ed7-bd43-cf0f4788fb60",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(r\"E:\\\\df_original.xlsx\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5055ddde-57eb-41e7-a672-dd430afb5e08",
   "metadata": {},
   "source": [
    "## 1. Figure 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5c9026fa-cade-4b6d-b136-9210e2a170ae",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 1: Ensure clean versions of key predictors\n",
    "df['welfare_for_aged'] = pd.to_numeric(df['welfare_for_aged'], errors='coerce')\n",
    "df['hard_on_north_korea'] = pd.to_numeric(df['hard_on_north_korea'], errors='coerce')\n",
    "\n",
    "df['income_quintile'] = pd.qcut(df['income'], q=5, labels=False) + 1\n",
    "\n",
    "# Create interaction terms\n",
    "df['treatment_welfare_lib'] = df['treatment_welfare'] * df['liberal_votes']\n",
    "df['treatment_northkorea_lib'] = df['treatment_northkorea'] * df['liberal_votes']\n",
    "\n",
    "# Step 2: Create dummy matrix\n",
    "controls = ['age', 'sex', 'education', 'income_quintile', 'homeownership', 'work_status', 'intention_to_vote']\n",
    "categorical_vars = ['living', 'religion', 'region']\n",
    "df_dummies = pd.get_dummies(df[categorical_vars], drop_first=True)\n",
    "X_controls = pd.concat([df[controls], df_dummies], axis=1)\n",
    "X_controls = X_controls.apply(pd.to_numeric, errors='coerce')\n",
    "\n",
    "# Step 3: Clean data for modeling\n",
    "model_data = pd.concat([\n",
    "    df[['welfare_for_aged', 'hard_on_north_korea', 'liberal_votes',\n",
    "        'treatment_welfare', 'treatment_welfare_lib',\n",
    "        'treatment_northkorea', 'treatment_northkorea_lib',\n",
    "        'presidential_election_preoutcome_2025', 'presidential_election_postoutcome_2025']],\n",
    "    X_controls\n",
    "], axis=1).dropna().astype(float)\n",
    "\n",
    "# Step 4: Define model fitting function\n",
    "def fit_logit_model(y_var, predictors, label):\n",
    "    y = model_data[y_var]\n",
    "    X = sm.add_constant(model_data[predictors])\n",
    "    model = sm.Logit(y, X).fit(disp=False)\n",
    "    coef = model.params\n",
    "    conf = model.conf_int()\n",
    "    conf.columns = ['ci_lower', 'ci_upper']\n",
    "    return pd.DataFrame({\n",
    "        'term': coef.index,\n",
    "        'odds_ratio': np.exp(coef),\n",
    "        'ci_lower': np.exp(conf['ci_lower']),\n",
    "        'ci_upper': np.exp(conf['ci_upper']),\n",
    "        'model': label\n",
    "    })\n",
    "\n",
    "# Step 5: Define models\n",
    "predictor_vars = list(X_controls.columns)\n",
    "model_1 = fit_logit_model('presidential_election_preoutcome_2025',\n",
    "                          ['liberal_votes'] + predictor_vars,\n",
    "                          '(a) DV: Pre-treatment outcome')\n",
    "\n",
    "model_2 = fit_logit_model('presidential_election_preoutcome_2025',\n",
    "                          ['welfare_for_aged', 'hard_on_north_korea'] + predictor_vars,\n",
    "                          '(b) DV: Pre-treatment outcome')\n",
    "\n",
    "model_3 = fit_logit_model('presidential_election_postoutcome_2025',\n",
    "                          ['liberal_votes', 'treatment_welfare', 'treatment_welfare_lib'] + predictor_vars,\n",
    "                          '(c) DV: Post-treatment outcome')\n",
    "\n",
    "model_4 = fit_logit_model('presidential_election_postoutcome_2025',\n",
    "                          ['liberal_votes', 'treatment_northkorea', 'treatment_northkorea_lib'] + predictor_vars,\n",
    "                          '(d) DV: Post-treatment outcome')\n",
    "\n",
    "# Step 6: Combine and filter for key terms\n",
    "results_df = pd.concat([model_1, model_2, model_3, model_4])\n",
    "keep_terms = ['liberal_votes', 'welfare_for_aged', 'hard_on_north_korea',\n",
    "              'treatment_welfare', 'treatment_northkorea',\n",
    "              'treatment_welfare_lib', 'treatment_northkorea_lib']\n",
    "results_df = results_df[results_df['term'].isin(keep_terms)]\n",
    "\n",
    "# Rename terms\n",
    "label_map = {\n",
    "    'liberal_votes': 'Previous Liberal vote count',\n",
    "    'welfare_for_aged': 'Welfare for senior citizens',\n",
    "    'hard_on_north_korea': 'Tough stance on North Korea',\n",
    "    'treatment_welfare': 'Welfare treatment',\n",
    "    'treatment_northkorea': 'North Korea treatment',\n",
    "    'treatment_welfare_lib': 'Liberal × Welfare',\n",
    "    'treatment_northkorea_lib': 'Liberal × North Korea'\n",
    "}\n",
    "results_df['term'] = results_df['term'].replace(label_map)\n",
    "\n",
    "# Step 7: Plot\n",
    "fig, axes = plt.subplots(2, 2, figsize=(14, 8))\n",
    "panel_titles = results_df['model'].unique()\n",
    "\n",
    "for idx, model_label in enumerate(panel_titles):\n",
    "    ax = axes[idx // 2, idx % 2]\n",
    "    subset = results_df[results_df['model'] == model_label].reset_index(drop=True)\n",
    "    for i, row in subset.iterrows():\n",
    "        ax.errorbar(\n",
    "            x=row['odds_ratio'], y=i,\n",
    "            xerr=[[row['odds_ratio'] - row['ci_lower']], [row['ci_upper'] - row['odds_ratio']]],\n",
    "            fmt='o', color='black', capsize=4, lw=1.5, label='95% CI' if i == 0 else \"\"\n",
    "        )\n",
    "        ax.text(row['odds_ratio'], i - 0.05, f\"{row['odds_ratio']:.4f}\",\n",
    "                fontsize=12, ha='center', va='bottom')\n",
    "    ax.axvline(1, color='red', linestyle='--', linewidth=0.8)\n",
    "    ax.set_yticks(range(len(subset)))\n",
    "    ax.set_yticklabels(subset['term'], fontsize=13)\n",
    "    ax.set_xlabel('Odds Ratio', fontsize=13)\n",
    "    ax.set_title(model_label, fontsize=17, pad=15)\n",
    "    ax.invert_yaxis()\n",
    "    ax.set_ylim(len(subset) - 0.5, -0.5)\n",
    "    ax.grid(True, linestyle='--', alpha=0.4)\n",
    "    ax.legend(loc='upper left', fontsize=13)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"fig_2.png\", dpi=600, bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5c563e4f-aa6e-48c0-bd35-3338a3076bd7",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "628f78f2-364d-49c9-9486-8f7b58d3592a",
   "metadata": {},
   "source": [
    "## 2. Figure 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e2c5b4f6-3801-4976-855c-5e58f86f9168",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=True)\n",
    "\n",
    "# Welfare Treatment Plot\n",
    "for group, style in zip(['Control', 'Welfare Treatment'], [('o', '-'), ('p', ':')]):\n",
    "    data = grid_welfare[grid_welfare['Group'] == group]\n",
    "    sns.lineplot(data=data, x='liberal_votes', y='predicted_prob',\n",
    "                 label=group, ax=axes[0], marker=style[0], linestyle=style[1], linewidth=2)\n",
    "\n",
    "axes[0].set_title('(a) Welfare Treatment Effect', fontsize=19, pad=15)\n",
    "axes[0].set_xlabel('Previous Liberal Vote Count', fontsize=16)\n",
    "axes[0].set_ylabel('Predicted Probability', fontsize=16)\n",
    "axes[0].tick_params(axis='both', labelsize=14)\n",
    "axes[0].legend(title='Group', fontsize=13, title_fontsize=14)\n",
    "\n",
    "# Add numeric labels\n",
    "for _, row in grid_welfare.iterrows():\n",
    "    axes[0].text(row['liberal_votes'], row['predicted_prob'] + 0.01,\n",
    "                 f\"{row['predicted_prob']:.2f}\", fontsize=11, ha='center', va='bottom')\n",
    "\n",
    "axes[0].grid(True, linestyle='--', alpha=0.5)\n",
    "\n",
    "# North Korea Treatment Plot\n",
    "for group, style in zip(['Control', 'North Korea Treatment'], [('o', '-'), ('p', ':')]):\n",
    "    data = grid_nk[grid_nk['Group'] == group]\n",
    "    sns.lineplot(data=data, x='liberal_votes', y='predicted_prob',\n",
    "                 label=group, ax=axes[1], marker=style[0], linestyle=style[1], linewidth=2)\n",
    "\n",
    "axes[1].set_title('(b) North Korea Treatment Effect', fontsize=19, pad=15)\n",
    "axes[1].set_xlabel('Previous Liberal Vote Count', fontsize=16)\n",
    "axes[1].set_ylabel('Predicted Probability', fontsize=16)\n",
    "axes[1].tick_params(axis='both', labelsize=14)\n",
    "axes[1].legend(title='Group', fontsize=13, title_fontsize=14)\n",
    "\n",
    "# Add numeric labels\n",
    "for _, row in grid_nk.iterrows():\n",
    "    axes[1].text(row['liberal_votes'], row['predicted_prob'] + 0.01,\n",
    "                 f\"{row['predicted_prob']:.2f}\", fontsize=11, ha='center', va='bottom')\n",
    "\n",
    "axes[1].grid(True, linestyle='--', alpha=0.5)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"fig_3.png\", dpi=600)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "705b69ff-3c92-4f0b-afe8-064b57f7b0ee",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
